This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
This version of beta calculation is used for the theoretical method/results of the paper. For reference: the last update was Dec 2018 for an R notebook. This R notebook is an updated and more sophisticated version.
Constants used are listed at the start of this script.
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
source("inspack.R")

#values needed
K= 1.38064852*(10)^-23 #m2 kg/ s2 K boltzmann constant
mu= 1.126*(10)^-3 #kg/m s dynamic viscosity in 18C
v= 1.099*(10)^-6 #m2/s kinematic viscosity in 18C
Reh_calc= 2.3E-6 #in m radius Ehux
Reh_naked= 1.8E-6 #in m radius Ehux
Reh_lith = 1.3E-6 #in m radius
Rehv= 90*(10)^-9 #in m radius virus
Temp = 18+273.15 #temp in kelvin, here assuming 18C
Den_OcM = 1.05 #g/cm3 density organic cell matter
Den_CH2O= 1.025 #g/cm3 density seawater at 18C
CcNum <- 0.9*((10)^3)
NcNum <- 0.1*((10)^3)
ViNum <- (CcNum+NcNum)*10
LiNum <- CcNum*10
Calculating for Brownian motion
#Brownian motion (BM)
#1. make a data frame
BM <- data.frame (group=as.factor(c("Nc", "Cc", "Cc", "Li", "Li")), group2 =c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), rad= c(Reh_naked, Reh_calc, Reh_calc, Reh_lith, Reh_lith))
#2. calculate beta (beta)
BM$beta_BM <- ((2*(K*(10)^4)*Temp*(((BM$rad+Rehv)*100)^2))/((3*mu*10)*(BM$rad*Rehv*1e4)))*86400 #cm3/d
# go back to this later
#3. calculate encounters (E)
#BM$E_BM <- BM$beta_BM*ViNum
BM
Differential settling
#Differential settling (DS)
#1. read in PIC data
library(readr) #always use readr not baseR
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
PIC <- read_csv("Postdoc-R/CSV Files/PIC.csv")
Parsed with column specification:
cols(
Strain = [31mcol_character()[39m,
Replicate = [32mcol_double()[39m,
TC = [32mcol_double()[39m,
AC = [32mcol_double()[39m,
Cellcount = [32mcol_double()[39m
)
PIC$Strain <- as.factor(PIC$Strain)
PIC$Replicate <- as.factor(PIC$Replicate)
#2. calculate PIC for cell
PIC$PIC <- PIC$TC-PIC$AC
PIC$PICpercell <- (PIC$PIC/PIC$Cellcount)*(10)^-3#in g
PIC$PICpercellpg <- PIC$PICpercell*1e12
ggplotly(ggplot(data=PIC, aes(x=Strain, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2) +theme_Publication())
#3a. calculate density of cells (den_cell)
PIC <- mutate(PIC, group = ifelse(PICpercellpg < 4 , "Nc", "Cc"))
ggplotly(ggplot(data=PIC, aes(x=group, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2, aes(color=Strain))+ theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
PIC <- mutate(PIC, rad = ifelse(group == "Nc" , 1.8E-6, 2.3E-6)) #in m
PIC$volume <- (4/3)*pi*(PIC$rad*100)^3 #in cm3
PIC$Den_cell <- PIC$PICpercell/PIC$volume #g/cm3
PIC$Den_celltotal <- PIC$Den_cell+Den_OcM
#remove strains 621, 623, 625
PIC <- PIC %>% filter (! Strain %in% c("621", "623", "655"))
ggplotly(ggplot(data=PIC, aes(x=Strain, y=Den_celltotal, color=group)) + geom_boxplot()+geom_point(size=2)
+theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#some strains that are "naked" have PIC<2. I chose to ignore this since in the lm model I do not use
#strain as a factor, rather data is treated as a whole (e.g., no grouping)
#assume grouping
PIC$group2 <- case_when(
PIC$PICpercellpg <2 ~ "Nc",
PIC$PICpercellpg >2 & PIC$PICpercellpg < 4 ~ "Nc/Cc uncertain",
PIC$PICpercellpg >4 & PIC$PICpercellpg < 10 ~ "Cc-Mc",
PIC$PICpercellpg >10 ~ "Cc-Hc",
TRUE ~ as.character(PIC$PICpercellpg)
)
PIC$group2 <- factor (PIC$group2,levels= c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"),
labels = c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"))
#remove Nc/Cc uncertain
PIC <- PIC %>% filter (! group2 %in% c("Nc/Cc uncertain"))
#3b. calculate PIC and density for lith
PIC$perlith <- PIC$PICpercell/14 #in g, assuming 20 liths attached
PIC$perlithpg <- PIC$perlith*1e12 #in pg
PIC$Denlith <- case_when(
PIC$group2 == "Nc" ~ 1.025,
PIC$group2 == "Cc-Mc" ~ 2,
PIC$group2 == "Cc-Hc" ~ 2.6)
ggplotly(ggplot(data=PIC, aes(x=group, y=Denlith, color=group)) + geom_boxplot()+geom_point(size=2)
+theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#4. calculate sinking velocity of cells,liths, and viruses
PIC$SinkVel <- ((2*((PIC$rad*100)^2)*(981)*(PIC$Den_celltotal-Den_CH2O))/(9*(mu*10)))*864 #meter per day
PIC$SinkVel_lith <- ((2*((Reh_lith*100)^2)*(981)*(PIC$Denlith-Den_CH2O))/(9*(mu*10)))*864 #meter per day
Ehv_SinkVel <- ((2*((Rehv*100)^2)*(981)*(Den_CH2O-Den_CH2O))/(9*(mu*10)))*864 #equals to 0
#g is converted to per day, 864 is the one that converts cm/s to m/day
#plot sinking velocity vs calcification
ggplot(data=PIC, aes(x=PICpercellpg, y=SinkVel, color=Strain, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~cell^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

ggplot(data=PIC %>% filter (group=="Cc"), aes(x=perlithpg, y=SinkVel_lith, color=group2, shape=group2)) +
geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~lith^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

#5. calculate betas for DS
PIC$beta_DS <-(pi*(((PIC$rad+Rehv)*100)^2)*(abs((PIC$SinkVel-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC$beta_DS_lith <- (pi*(((Reh_lith+Rehv)*100)^2)*(abs((PIC$SinkVel_lith-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
ggplot(data=PIC, aes(x=PICpercellpg, y=beta_DS, color=Strain, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~cell^-1)) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

ggplot(data=PIC %>% filter (group=="Cc"), aes(x=perlithpg, y=beta_DS_lith, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~lith^-1)) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

#6. calculate encounters
PIC$E_DS <- (PIC$beta_DS*ViNum) #E calculated with Virus for cell
PIC$E_DS_lith <- (PIC$beta_DS_lith*ViNum) #E calculated with Virus for lith
ggplotly(ggplot(data=PIC, aes(x=PICpercellpg, y=log10(E_DS))) +geom_point(size=5, aes(shape=group2, color=Strain)) +
theme_Publication() + geom_smooth())
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
ggplot(data=PIC, aes(x=PICpercellpg, y=E_DS, color=Strain, shape=group2)) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~cell^-1))

ggplot(data=PIC %>% filter (group =="Cc"), aes(x=perlithpg, y=E_DS_lith, color=Strain, shape=group2)) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~lith^-1))

#7. summaries
library(plyr)
summary_DS <- ddply(PIC, .(Strain), summarize, PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg),
Den_celltotal = mean (Den_celltotal),SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup2 <- ddply(PIC, .(group2), summarize, PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg),
Den_celltotal = mean (Den_celltotal), SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup <- ddply(PIC, .(group),
summarize, PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg),
Den_celltotal = mean (Den_celltotal), SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS
summary_DS_bygroup2
summary_DS_bygroup
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
require(openxlsx)
write.xlsx(summary_DS, file = "Postdoc-R/Exported Tables/summary_DS_master4.xlsx")
write.xlsx(summary_DS_bygroup, file = "Postdoc-R/Exported Tables/summary_DS_bygroup_master4.xlsx")
write.xlsx(summary_DS_bygroup2, file = "Postdoc-R/Exported Tables/summary_DS_bygroup2_master4.xlsx")
#8. regressions
#8a. PIC and beta_DS of cells
PIC_beta_reg <- lm (beta_DS ~ PICpercellpg, data=PIC)
summary(PIC_beta_reg)
Call:
lm(formula = beta_DS ~ PICpercellpg, data = PIC)
Residuals:
Min 1Q Median 3Q Max
-1.322e-07 -7.329e-08 -2.830e-08 8.065e-08 1.657e-07
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.496e-07 2.295e-08 6.52 3.29e-07 ***
PICpercellpg 3.285e-07 2.529e-09 129.90 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.621e-08 on 30 degrees of freedom
Multiple R-squared: 0.9982, Adjusted R-squared: 0.9982
F-statistic: 1.688e+04 on 1 and 30 DF, p-value: < 2.2e-16
plot(residuals.lm(PIC_beta_reg))

coef(PIC_beta_reg)
(Intercept) PICpercellpg
1.496035e-07 3.284952e-07
cor(PIC$PICpercellpg, PIC$beta_DS)
[1] 0.9991123
#8d other regressions, change this
PIC_Den_celltotal_reg <- lm(Den_celltotal ~ PICpercellpg, data=PIC)
PIC_SinkVel_reg <- lm (SinkVel ~ PICpercellpg, data=PIC)
plot(residuals.lm(PIC_Den_celltotal_reg))

plot(residuals.lm(PIC_SinkVel_reg))

#9. make a prediction based on PIC values
#9a. for cells
require(truncnorm)
require(Rmisc)
summary(PIC$PICpercellpg)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.4144 0.5770 3.2591 6.0910 9.2665 20.1442
summarySE(data=PIC, measurevar="PICpercellpg")
#make new dataframe
PIC_newdata <- as.data.frame(rtruncnorm(n=10000, a=-0.4, b=20.14, mean=6.09, sd=6.83))
#rename column. rename function in plyr
library(plyr)
PIC_newdata <- plyr::rename (PIC_newdata, c ("rtruncnorm(n = 10000, a = -0.4, b = 20.14, mean = 6.09, sd = 6.83)" = "PICpercellpg"))
PIC_newdata <- mutate(PIC_newdata, group = ifelse(PICpercellpg < 4 , "Nc", "Cc"))
ggplotly(ggplot(data=PIC_newdata, aes(x=group, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2) +
theme_Publication())
PIC_newdata$group2 <- case_when(
PIC_newdata$PICpercellpg <2 ~ "Nc",
PIC_newdata$PICpercellpg >2 & PIC_newdata$PICpercellpg < 4 ~ "Nc/Cc uncertain",
PIC_newdata$PICpercellpg >4 & PIC_newdata$PICpercellpg < 10 ~ "Cc-Mc",
PIC_newdata$PICpercellpg >10 ~ "Cc-Hc",
TRUE ~ as.character(PIC_newdata$PICpercellpg)
)
PIC_newdata$group2 <- factor (PIC_newdata$group2,levels= c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"), labels = c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"))
#remove Nc/Cc uncertain
PIC_newdata <- PIC_newdata %>% filter (! group2 %in% c("Nc/Cc uncertain"))
#calculate new stuff
PIC_newdata <- mutate(PIC_newdata, rad = ifelse(group == "Nc" , 1.8E-6, 2.3E-6)) #in m
PIC_newdata$volume <- (4/3)*pi*(PIC_newdata$rad*100)^3 #in cm3
PIC_newdata$Den_cell <- (PIC_newdata$PICpercell*1e-12)/PIC_newdata$volume #g/cm3
PIC_newdata$Den_celltotal <- PIC_newdata$Den_cell+Den_OcM
ggplotly(ggplot(data=PIC, aes(x=Strain, y=Den_celltotal, color=group)) + geom_boxplot()+geom_point(size=2)
+theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#3b. calculate PIC and density for lith
PIC_newdata$perlithpg <- PIC_newdata$PICpercellpg/14 #in g, assuming 20 liths attached
PIC_newdata$Denlith <- case_when(
PIC_newdata$group2 == "Nc" ~ 1.025,
PIC_newdata$group2 == "Nc/Cc uncertain" ~ 1.025,
PIC_newdata$group2 == "Cc-Mc" ~ 2,
PIC_newdata$group2 == "Cc-Hc" ~ 2.6)
ggplotly(ggplot(data=PIC_newdata, aes(x=group, y=Denlith, color=group)) + geom_boxplot()+geom_point(size=2)
+theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#4. calculate sinking velocity of cells,liths, and viruses
PIC_newdata$SinkVel <- ((2*((PIC_newdata$rad*100)^2)*(981)*(PIC_newdata$Den_celltotal-Den_CH2O))/(9*(mu*10)))*864 #meter per day
PIC_newdata$SinkVel_lith <- ((2*((Reh_lith*100)^2)*(981)*(PIC_newdata$Denlith-Den_CH2O))/(9*(mu*10)))*864 #meter per day
Ehv_SinkVel <- ((2*((Rehv*100)^2)*(981)*(Den_CH2O-Den_CH2O))/(9*(mu*10)))*864 #equals to 0
#g is converted to per day, 864 is the one that converts cm/s to m/day
#plot sinking velocity vs calcification
ggplot(data=PIC_newdata, aes(x=PICpercellpg, y=SinkVel, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~cell^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

ggplot(data=PIC_newdata %>% filter (group=="Cc"), aes(x=perlithpg, y=SinkVel_lith, color=group2, shape=group2)) +
geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~lith^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

#5. calculate betas for DS
PIC_newdata$beta_DS <-(pi*(((PIC_newdata$rad+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC_newdata$beta_DS_lith <- (pi*(((Reh_lith+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel_lith-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
ggplot(data=PIC_newdata %>% filter (!(group2 %in% c("Nc/Cc uncertain"))), aes(x=PICpercellpg , y=beta_DS, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~cell^-1)) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

ggplot(data=PIC_newdata %>% filter (group=="Cc"), aes(x=perlithpg, y=beta_DS_lith, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~lith^-1)) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())

#6. calculate encounters
PIC_newdata$E_DS <- (PIC_newdata$beta_DS*ViNum) #E calculated with Virus for cell
PIC_newdata$E_DS_lith <- (PIC_newdata$beta_DS_lith*ViNum) #E calculated with Virus for lith
ggplotly(ggplot(data=PIC_newdata, aes(x=PICpercellpg, y=log10(E_DS))) +geom_point(size=5, aes(shape=group2, color=group2)) +
theme_Publication() + geom_smooth())
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#remove strains 621, 623, 625
ggplot(data=PIC_newdata , aes(x=PICpercellpg, y=E_DS, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~cell^-1))

ggplot(data=PIC_newdata %>% filter (group =="Cc"), aes(x=perlithpg, y=E_DS_lith, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~lith^-1))

#10. calculate encounters based on newdata
summary_DS_bygroup<-ddply(PIC %>% filter (!(group2 %in% c("naked/calcified uncertain"))), .(group), summarize, PICpercellpg=mean(PICpercellpg),
perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup
#remove naked/calcified uncertain
summary_DS_bygroup_pred <-ddply(PIC_newdata, .(group), summarize, PICpercellpg=mean(PICpercellpg),
perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup_pred
summary_DS_bygroup2_pred <-ddply(PIC_newdata, .(group2), summarize, PICpercellpg=mean(PICpercellpg),
perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup2_pred
summarySE (PIC_newdata %>% filter (!(group2 %in% c("naked/calcified uncertain"))),
measurevar = "PICpercellpg", groupvars = c("group"))
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
write.xlsx(summary_DS_bygroup_pred, file = "Postdoc-R/Exported Tables/summary_DS_bygroup_pred_master4.xlsx")
#arrange the summary
DS_pred <- data.frame (group=as.factor(c("Nc", "Cc", "Cc", "Li", "Li")), group2 =c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), PIC = c(0.8767253, 6.9482471, 13.6411818, 0.49630336, 0.97437013), Den = c(1.085889, 1.186334, 1.317658, 2, 2.6), SinkVel = c(0.03299996,0.14276197, 0.25896889, 0.2756279, 0.4452451), beta_DS = c(3.703283e-07, 2.561877e-06, 4.647220e-06, 1.673026e-06, 2.702580e-06))
DS_pred
Turbulence

melt all data and make a table with all
all <- Reduce(function(x,y) merge(x,y,by="group2",all=TRUE) ,
list(BM %>% select (group2, beta_BM), DS_pred %>% select (group2, beta_DS),
turb %>% select (group2, disrate, beta_turb)))
all$beta_all <- all$beta_BM + all$beta_DS + all$beta_turb
all$E_all_low <- all$beta_all*(10^4)
all$E_all_high <- all$beta_all*(10^6)
all$group <- factor (all$group,levels= c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), labels = c("Nc", "Cc", "Cc", "Li", "Li"))
#make summary so that you can have CI
#plotting
ggplot(data=all, aes(x=disrate,y = E_all_low, color=group, fill=group)) +
geom_smooth(size=1, position=position_jitter(w=0.02, h=0), aes(linetype="10^3", fill=group))+
geom_smooth(data = all, aes(y= E_all_high, color=group,fill=group, linetype="10^5")) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=7),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
annotation_logticks()+
theme_Publication() +
theme(legend.title = element_blank(), legend.key.width=unit(1,"cm"))+
labs(y = expression("viral encounters " ~day^-1~cell^-1), x = expression("dissipation rate "~(m^2~s^-3))) +
theme(legend.title = element_blank())

---
title: "beta kernel for paper"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

This version of beta calculation is used for the theoretical method/results of the paper. For reference: the last update was Dec 2018 for an R notebook. This R notebook is an updated and more sophisticated version. 

Constants used are listed at the start of this script.

```{r}
setwd("D:/R program")
source("inspack.R")
#values needed 

K= 1.38064852*(10)^-23 #m2 kg/ s2 K boltzmann constant
mu= 1.126*(10)^-3 #kg/m s dynamic viscosity in 18C
v= 1.099*(10)^-6 #m2/s kinematic viscosity in 18C
Reh_calc= 2.3E-6 #in m radius Ehux
Reh_naked= 1.8E-6 #in m radius Ehux
Reh_lith = 1.3E-6 #in m radius
Rehv= 90*(10)^-9 #in m radius virus
Temp = 18+273.15 #temp in kelvin, here assuming 18C
Den_OcM = 1.05 #g/cm3 density organic cell matter
Den_CH2O= 1.025 #g/cm3 density seawater at 18C
CcNum <- 0.9*((10)^3)
NcNum <- 0.1*((10)^3)
ViNum <- (CcNum+NcNum)*10
LiNum <- CcNum*10
```

Calculating for Brownian motion

```{r}
#Brownian motion (BM)
#1. make a data frame
BM <- data.frame (group=as.factor(c("Nc", "Cc", "Cc", "Li", "Li")), group2 =c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), rad= c(Reh_naked, Reh_calc, Reh_calc, Reh_lith, Reh_lith)) 

#2. calculate beta (beta)
BM$beta_BM <- ((2*(K*(10)^4)*Temp*(((BM$rad+Rehv)*100)^2))/((3*mu*10)*(BM$rad*Rehv*1e4)))*86400 #cm3/d

# go back to this later
#3. calculate encounters (E)
#BM$E_BM <- BM$beta_BM*ViNum

BM
```

Differential settling
```{r}
#Differential settling (DS)
#1. read in PIC data
library(readr) #always use readr not baseR

setwd("D:/R program")
PIC <- read_csv("Postdoc-R/CSV Files/PIC.csv")

PIC$Strain <- as.factor(PIC$Strain)
PIC$Replicate <- as.factor(PIC$Replicate)

#2. calculate PIC for cell
PIC$PIC <- PIC$TC-PIC$AC
PIC$PICpercell <- (PIC$PIC/PIC$Cellcount)*(10)^-3#in g
PIC$PICpercellpg <- PIC$PICpercell*1e12

ggplotly(ggplot(data=PIC, aes(x=Strain, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2) +theme_Publication())

#3a. calculate density of cells (den_cell)
PIC <- mutate(PIC, group = ifelse(PICpercellpg < 4 , "Nc", "Cc"))

ggplotly(ggplot(data=PIC, aes(x=group, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2, aes(color=Strain))+ theme_Publication())

PIC <- mutate(PIC, rad = ifelse(group == "Nc" , 1.8E-6,  2.3E-6)) #in m

PIC$volume <- (4/3)*pi*(PIC$rad*100)^3 #in cm3
PIC$Den_cell <- PIC$PICpercell/PIC$volume #g/cm3
PIC$Den_celltotal <- PIC$Den_cell+Den_OcM

#remove strains 621, 623, 625
PIC <- PIC %>% filter (! Strain %in% c("621", "623", "655"))

ggplotly(ggplot(data=PIC, aes(x=Strain, y=Den_celltotal, color=group)) + geom_boxplot()+geom_point(size=2) 
         +theme_Publication())
#some strains that are "naked" have PIC<2. I chose to ignore this since in the lm model I do not use
#strain as a factor, rather data is treated as a whole (e.g., no grouping)


```
```{r}
#assume grouping
PIC$group2 <- case_when(
  PIC$PICpercellpg <2  ~ "Nc",
  PIC$PICpercellpg >2 & PIC$PICpercellpg < 4 ~ "Nc/Cc uncertain",
  PIC$PICpercellpg >4 & PIC$PICpercellpg < 10 ~ "Cc-Mc",
  PIC$PICpercellpg >10 ~ "Cc-Hc", 
  TRUE ~ as.character(PIC$PICpercellpg)
)

PIC$group2 <- factor (PIC$group2,levels= c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"),
                                                       labels =  c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"))

#remove Nc/Cc uncertain
PIC <- PIC %>% filter (! group2 %in% c("Nc/Cc uncertain"))


#3b. calculate PIC and density for lith
PIC$perlith <- PIC$PICpercell/14 #in g, assuming 20 liths attached
PIC$perlithpg <- PIC$perlith*1e12 #in pg
PIC$Denlith <- case_when(
  PIC$group2 == "Nc"  ~ 1.025, 
  PIC$group2 == "Cc-Mc" ~ 2,
  PIC$group2 == "Cc-Hc" ~ 2.6)

ggplotly(ggplot(data=PIC, aes(x=group, y=Denlith, color=group)) + geom_boxplot()+geom_point(size=2) 
         +theme_Publication())

```

```{r}
#4. calculate sinking velocity of cells,liths, and viruses
PIC$SinkVel <- ((2*((PIC$rad*100)^2)*(981)*(PIC$Den_celltotal-Den_CH2O))/(9*(mu*10)))*864 #meter per day
PIC$SinkVel_lith <- ((2*((Reh_lith*100)^2)*(981)*(PIC$Denlith-Den_CH2O))/(9*(mu*10)))*864 #meter per day
Ehv_SinkVel <- ((2*((Rehv*100)^2)*(981)*(Den_CH2O-Den_CH2O))/(9*(mu*10)))*864  #equals to 0
#g is converted to per day, 864 is the one that converts cm/s to m/day
#plot sinking velocity vs calcification


ggplot(data=PIC, aes(x=PICpercellpg, y=SinkVel, color=Strain, shape=group2)) + geom_point(size=5)+theme_Publication()+
    labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~cell^-1)) +
    theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 

ggplot(data=PIC %>% filter (group=="Cc"), aes(x=perlithpg, y=SinkVel_lith, color=group2, shape=group2)) + 
  geom_point(size=5)+theme_Publication()+
  labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~lith^-1)) +
    theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 


```
```{r}
#5. calculate betas for DS
PIC$beta_DS <-(pi*(((PIC$rad+Rehv)*100)^2)*(abs((PIC$SinkVel-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC$beta_DS_lith <- (pi*(((Reh_lith+Rehv)*100)^2)*(abs((PIC$SinkVel_lith-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day

ggplot(data=PIC, aes(x=PICpercellpg, y=beta_DS, color=Strain,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~cell^-1))  +
scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 

ggplot(data=PIC %>% filter (group=="Cc"), aes(x=perlithpg, y=beta_DS_lith, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
  labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~lith^-1))  +
scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 

```


```{r}
#6. calculate encounters
PIC$E_DS <- (PIC$beta_DS*ViNum) #E calculated with Virus for cell
PIC$E_DS_lith <- (PIC$beta_DS_lith*ViNum) #E calculated with Virus for lith

ggplotly(ggplot(data=PIC, aes(x=PICpercellpg, y=log10(E_DS))) +geom_point(size=5, aes(shape=group2, color=Strain)) +
    theme_Publication() + geom_smooth())

ggplot(data=PIC, aes(x=PICpercellpg, y=E_DS, color=Strain,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
  labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~cell^-1))

ggplot(data=PIC %>% filter (group =="Cc"), aes(x=perlithpg, y=E_DS_lith, color=Strain,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
  labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~lith^-1))
```

```{r}
#7. summaries
library(plyr)
summary_DS <- ddply(PIC, .(Strain), summarize,  PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg), 
                    Den_celltotal = mean (Den_celltotal),SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup2 <- ddply(PIC, .(group2), summarize,  PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg), 
                    Den_celltotal = mean (Den_celltotal), SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))

summary_DS_bygroup <- ddply(PIC, .(group), 
                            summarize,  PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg), 
                    Den_celltotal = mean (Den_celltotal), SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS
summary_DS_bygroup2
summary_DS_bygroup
setwd("D:/R program")
require(openxlsx)
write.xlsx(summary_DS, file = "Postdoc-R/Exported Tables/summary_DS_master4.xlsx")
write.xlsx(summary_DS_bygroup, file = "Postdoc-R/Exported Tables/summary_DS_bygroup_master4.xlsx")
write.xlsx(summary_DS_bygroup2, file = "Postdoc-R/Exported Tables/summary_DS_bygroup2_master4.xlsx")
```

```{r}
#8. regressions
#8a. PIC and beta_DS of cells 
PIC_beta_reg <- lm (beta_DS ~ PICpercellpg, data=PIC)
summary(PIC_beta_reg)
plot(residuals.lm(PIC_beta_reg))
coef(PIC_beta_reg)
cor(PIC$PICpercellpg, PIC$beta_DS)

#8d other regressions, change this
PIC_Den_celltotal_reg <- lm(Den_celltotal ~ PICpercellpg, data=PIC)
PIC_SinkVel_reg <- lm (SinkVel ~ PICpercellpg, data=PIC)

plot(residuals.lm(PIC_Den_celltotal_reg))
plot(residuals.lm(PIC_SinkVel_reg))

```

```{r}
#9. make a prediction based on PIC values
#9a. for cells
require(truncnorm)
require(Rmisc)
summary(PIC$PICpercellpg)
summarySE(data=PIC, measurevar="PICpercellpg")

#make new dataframe
PIC_newdata <- as.data.frame(rtruncnorm(n=10000, a=-0.4, b=20.14, mean=6.09, sd=6.83))
#rename column. rename function in plyr 
library(plyr)
PIC_newdata <- plyr::rename (PIC_newdata, c ("rtruncnorm(n = 10000, a = -0.4, b = 20.14, mean = 6.09, sd = 6.83)" = "PICpercellpg"))
PIC_newdata <- mutate(PIC_newdata, group = ifelse(PICpercellpg < 4 , "Nc", "Cc"))
ggplotly(ggplot(data=PIC_newdata, aes(x=group, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2) +
           theme_Publication())

```
```{r}
PIC_newdata$group2 <- case_when(
  PIC_newdata$PICpercellpg <2  ~ "Nc",
  PIC_newdata$PICpercellpg >2 & PIC_newdata$PICpercellpg < 4 ~ "Nc/Cc uncertain",
  PIC_newdata$PICpercellpg >4 & PIC_newdata$PICpercellpg < 10 ~ "Cc-Mc",
  PIC_newdata$PICpercellpg >10 ~ "Cc-Hc", 
  TRUE ~ as.character(PIC_newdata$PICpercellpg)
)

PIC_newdata$group2 <- factor (PIC_newdata$group2,levels= c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"), labels =  c("Nc", "Nc/Cc uncertain", "Cc-Mc", "Cc-Hc"))

#remove Nc/Cc uncertain
PIC_newdata <- PIC_newdata %>% filter (! group2 %in% c("Nc/Cc uncertain"))

#calculate new stuff

PIC_newdata <- mutate(PIC_newdata, rad = ifelse(group == "Nc" , 1.8E-6,  2.3E-6)) #in m

PIC_newdata$volume <- (4/3)*pi*(PIC_newdata$rad*100)^3 #in cm3
PIC_newdata$Den_cell <- (PIC_newdata$PICpercell*1e-12)/PIC_newdata$volume #g/cm3
PIC_newdata$Den_celltotal <- PIC_newdata$Den_cell+Den_OcM

ggplotly(ggplot(data=PIC, aes(x=Strain, y=Den_celltotal, color=group)) + geom_boxplot()+geom_point(size=2) 
         +theme_Publication())


#3b. calculate PIC and density for lith
PIC_newdata$perlithpg <- PIC_newdata$PICpercellpg/14 #in g, assuming 20 liths attached
PIC_newdata$Denlith <- case_when(
  PIC_newdata$group2 == "Nc"  ~ 1.025, 
  PIC_newdata$group2 == "Nc/Cc uncertain" ~ 1.025,
  PIC_newdata$group2 == "Cc-Mc" ~ 2,
  PIC_newdata$group2 == "Cc-Hc" ~ 2.6)

ggplotly(ggplot(data=PIC_newdata, aes(x=group, y=Denlith, color=group)) + geom_boxplot()+geom_point(size=2) 
         +theme_Publication())

#4. calculate sinking velocity of cells,liths, and viruses
PIC_newdata$SinkVel <- ((2*((PIC_newdata$rad*100)^2)*(981)*(PIC_newdata$Den_celltotal-Den_CH2O))/(9*(mu*10)))*864 #meter per day
PIC_newdata$SinkVel_lith <- ((2*((Reh_lith*100)^2)*(981)*(PIC_newdata$Denlith-Den_CH2O))/(9*(mu*10)))*864 #meter per day
Ehv_SinkVel <- ((2*((Rehv*100)^2)*(981)*(Den_CH2O-Den_CH2O))/(9*(mu*10)))*864  #equals to 0
#g is converted to per day, 864 is the one that converts cm/s to m/day
#plot sinking velocity vs calcification


ggplot(data=PIC_newdata, aes(x=PICpercellpg, y=SinkVel, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
    labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~cell^-1)) +
    theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 

ggplot(data=PIC_newdata %>% filter (group=="Cc"), aes(x=perlithpg, y=SinkVel_lith, color=group2, shape=group2)) + 
  geom_point(size=5)+theme_Publication()+
  labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC pg"~lith^-1)) +
    theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 



```
```{r}

#5. calculate betas for DS
PIC_newdata$beta_DS <-(pi*(((PIC_newdata$rad+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC_newdata$beta_DS_lith <- (pi*(((Reh_lith+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel_lith-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day

ggplot(data=PIC_newdata %>% filter (!(group2 %in% c("Nc/Cc uncertain"))), aes(x=PICpercellpg , y=beta_DS, color=group2,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~cell^-1))  +
scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 

ggplot(data=PIC_newdata %>% filter (group=="Cc"), aes(x=perlithpg, y=beta_DS_lith, color=group2, shape=group2)) + geom_point(size=5)+theme_Publication()+
  labs(y = expression(beta~("Encounters"~cm^3~day^-1)), x = expression("PIC pg"~lith^-1))  +
scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l")+
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) 
	  
#6. calculate encounters
PIC_newdata$E_DS <- (PIC_newdata$beta_DS*ViNum) #E calculated with Virus for cell
PIC_newdata$E_DS_lith <- (PIC_newdata$beta_DS_lith*ViNum) #E calculated with Virus for lith

ggplotly(ggplot(data=PIC_newdata, aes(x=PICpercellpg, y=log10(E_DS))) +geom_point(size=5, aes(shape=group2, color=group2)) +
    theme_Publication() + geom_smooth())
#remove strains 621, 623, 625

ggplot(data=PIC_newdata , aes(x=PICpercellpg, y=E_DS, color=group2,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
  labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~cell^-1))

ggplot(data=PIC_newdata %>% filter (group =="Cc"), aes(x=perlithpg, y=E_DS_lith, color=group2,  shape=group2)) + geom_point(size=5)+theme_Publication()+
  scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
      theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank()) +
  labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC pg"~lith^-1))
```

```{r}
#10. calculate encounters based on newdata

summary_DS_bygroup<-ddply(PIC %>% filter (!(group2 %in% c("naked/calcified uncertain"))), .(group), summarize,  PICpercellpg=mean(PICpercellpg), 
                    perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
                    SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS), 
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup

#remove naked/calcified uncertain

summary_DS_bygroup_pred <-ddply(PIC_newdata, .(group), summarize,  PICpercellpg=mean(PICpercellpg), 
                    perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
                    SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS), 
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup_pred

summary_DS_bygroup2_pred <-ddply(PIC_newdata, .(group2), summarize,  PICpercellpg=mean(PICpercellpg), 
                    perlithpg = mean(perlithpg), Den_celltotal = mean (Den_celltotal),
                    SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS), 
                    Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith), 
                    beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup2_pred

summarySE (PIC_newdata %>% filter (!(group2 %in% c("naked/calcified uncertain"))), 
           measurevar = "PICpercellpg", groupvars = c("group"))


setwd("D:/R program")
write.xlsx(summary_DS_bygroup_pred, file = "Postdoc-R/Exported Tables/summary_DS_bygroup_pred_master4.xlsx")
```

```{r}
#arrange the summary
DS_pred <- data.frame (group=as.factor(c("Nc", "Cc", "Cc", "Li", "Li")), group2 =c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), PIC = c(0.8767253, 6.9482471, 13.6411818, 0.49630336, 0.97437013), Den = c(1.085889, 1.186334, 1.317658, 2, 2.6), SinkVel = c(0.03299996,0.14276197, 0.25896889, 0.2756279, 0.4452451), beta_DS = c(3.703283e-07, 2.561877e-06, 4.647220e-06, 1.673026e-06, 2.702580e-06))

DS_pred
```

Turbulence
```{r}
#Turbulence
#1. make new dataframe
#disrate is cm2/s3
#make data frame
turb <- expand.grid(list (disrate = (c (1 %o% 10^(seq(-8,-2, 0.5)))), group2 =c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc")))
turb$group <- factor (turb$group,levels= c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), labels = c("Nc", "Cc", "Cc", "Li", "Li"))

turb$rad <- case_when(
    turb$group =="Nc" ~ 1.8E-6,
    turb$group =="Cc" ~ 2.3E-6,
    turb$group =="Li" ~ 1.3E-6,
    TRUE ~ as.numeric(turb$group)
)
turb$disrate <- as.numeric(as.character(turb$disrate))

#2. calculate beta and encounters
turb$beta_turb <- (4.2*pi*((turb$disrate/(v*100^2))^0.5)*(((turb$rad+Rehv)*100)^3))*86400 
turb$E_turb <- (turb$beta_turb*ViNum) #E calculated with virus only

turb$group <- factor (turb$group,levels= c("Nc", "Cc", "Li"), labels = c("Nc", "Cc", "Li"))

ggplot(data = turb, aes(x = disrate, y = beta_turb, color=group)) + geom_point(size =5) +
  scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=4),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
  scale_x_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=4),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
  annotation_logticks() +
  theme_Publication() +
  labs(y = expression(beta~("predicted encounters " ~cm^3~day^-1)), 
       x = expression("dissipation rate "~(m^2~s^-3))) +
   theme(legend.title = element_blank())

ggplot(data = turb, aes(x = disrate, y = E_turb, color=group)) + geom_point(size =5) +
   scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=4),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
  scale_x_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=7),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
  annotation_logticks()+
  theme_Publication()+
  labs(y = expression("viral encounters " ~ cm^-3~day^-1),x = expression("dissipation rate "~(m^2~s^-3))) +
  theme(legend.title = element_blank())
```

melt all data and make a table with all
```{r}
all <- Reduce(function(x,y) merge(x,y,by="group2",all=TRUE) ,
              list(BM %>% select (group2, beta_BM), DS_pred %>% select (group2, beta_DS), 
                   turb %>% select (group2, disrate, beta_turb)))
all$beta_all <- all$beta_BM + all$beta_DS + all$beta_turb
all$E_all_low <- all$beta_all*(10^4)
all$E_all_high <- all$beta_all*(10^6)
all$group <- factor (all$group,levels= c("Nc", "Cc-Mc", "Cc-Hc", "Li-Mc", "Li-Hc"), labels = c("Nc", "Cc", "Cc", "Li", "Li"))

#make summary so that you can have CI


#plotting
ggplot(data=all, aes(x=disrate,y = E_all_low , color=group, fill=group)) + 
    geom_smooth(size=1, position=position_jitter(w=0.02, h=0), aes(linetype="10^3", fill=group))+
    geom_smooth(data = all, aes(y= E_all_high, color=group,fill=group, linetype="10^5")) +
    scale_y_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
    scale_x_log10(
        breaks = scales::trans_breaks("log10", function(x) 10^x, n=7),
        labels = scales::trans_format("log10", scales::math_format(10^.x))) +
    annotation_logticks()+
    theme_Publication() +
    theme(legend.title = element_blank(), legend.key.width=unit(1,"cm"))+
    labs(y = expression("viral encounters " ~day^-1~cell^-1), x = expression("dissipation rate "~(m^2~s^-3))) +
    theme(legend.title = element_blank())
```

